/*	This program creates a working dataset for event studies using the retirement sample */

***** Set directories 
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"

local dataname "retirsample_bcw.dta" 

clear all 

use "`dir_clean'/retirementsample.dta", clear

*** Sample restrictions and outliers ***

* retirement contribution fund: industria & comercio
drop if aportaci!=1 

* Identify and drop people in early retirement regimes
drop if vf==103 // these are people working while already retired
drop if vf==97 | vf==98 // people reported with no service to the firm

* Correct start of benefits when it happens before retirement request date
replace month_benefits=first_receipt if month_benefits==. & first_receipt!=. & first_receipt!=tm(2001m12)
replace month_benefits=first_receipt if month_benefits<month_request & first_receipt!=tm(2001m12)
replace month_benefits=month_request if month_benefits<month_request & first_receipt==tm(2001m12)

* first retirement obs
g retirnow=(t==month_benefits) 
cap drop aux
g aux=t if retirnow==1
bysort i: egen tretir=min(aux)
format tretir %tm
drop aux
g postretir=0
replace postretir=1 if t>tretir
replace retirnow=0 if postretir==1
bysort i: egen retir_insample=max(retirnow)

* Drop people with employement after retirement
g flag= W>0 & W<. & postretir==1
bysort i: egen iflag=max(flag)
drop if iflag==1
drop flag iflag

* Keep self-employed and employed obs 
g self_empl=status_1==1 
g empl=status_3==1 
keep if self_empl==1 | empl==1

* Drop if no earnings reported
drop if W==.
drop if W==0 

* Keep only salaried work observations
keep if tipREM==1 

* Drop earnings outliers (same outlier thresholds used in our main sample)
sum W , det 
replace W=. if W<3.604606  
replace W=. if W>81.31013
drop if W==.

* Drop outliers firm size self-employed
sum ndep if self_empl==1, det
drop if ndep>r(p95) & self_empl==1

* Cohorts in sample
g cohort=yofd(Fnac)
keep if birth_month>=tm(1941m4) & birth_month<tm(1971m4)

* Outliers retirement age 
replace age_benefits=. if  age_benefits<57
replace age_request=. if age_request<57
 
* Keep people we observe retire
keep if month_benefits!=.


** Create additional variables **

* Compute benefit calculation window as 10 yrs before retirement
g bcw_start = month_benefits - 120
format bcw_start %tm

* nr. months relative to start of BCW
g timemonths_bcw = t - bcw_start 

* firm size
label define size_cat 0 "Micro less than 5 " 1 "Micro 5-9" 2 "Small 10-19" 3 "Small 20-49" 4 "Medium 50-249" 5 "Large 250 plus"
g 		ndep_cat=0 if ndep <5 // micro 1 less than 5
replace ndep_cat=1 if ndep>=5  & ndep<10 // micro 2
replace ndep_cat=2 if ndep>=10 & ndep<20 // small 1
replace ndep_cat=3 if ndep>=20 & ndep<50 // small 2
replace ndep_cat=4 if ndep>=50 & ndep<250 // medium
replace ndep_cat=5 if ndep>=250  // large
label values ndep_cat size_cat

cap drop year
g year=yofd(dofm(t))

*JOB CHANGES
xtset i t
sort i t
cap drop auxj
bys i: gen auxj=l1.j
sort i t
replace auxj=j[_n-1] if auxj==. & i[_n]==i[_n-1]
g jobchange=auxj!=j if auxj!=. 
bys i agedisc: egen jobchange_atage=max(jobchange) 
replace jobchange_atage=. if jobchange==.
bys i: egen anyjobchange=max(jobchange) 
drop auxj

* 2-digit CIIU
cap drop aux
tostring ciiu, g(aux)
g ciiu2=substr(aux,1,2)
destring ciiu2, replace
drop aux


*** Keep observations in the relevant interval around start BCW ***

keep if timemonths_bcw>=-60 & timemonths_bcw<96 // keep 5 years before and 7 years after start of BCW


*** Years relative to start BCW (start of benefits)
g time_bcw=.
forvalues y = -5(1)7 {
	local min=`y'*12
	local max=(`y'+1)*12
	replace time_bcw=`y' if timemonths_bcw>=`min' & timemonths_bcw<`max'
}

g age_bcwstart=(bcw_start-birth_month)/12


* Drop if simultaneous employment and self-employment
foreach X in self_empl empl {  
	bysort i t: egen `X'_mth=max(`X')
	bysort i: egen `X'_any=max(`X')
}
g inboth=self_empl_mth==1&empl_mth==1
bysort i: egen iinboth=max(inboth)
tab iinboth // this indicates that a person is simultaneously employed and self-employed at some time in the sample period
drop if iinboth==1 // drop individuals who at some point are simultaneously employed and self-employed
drop inboth iinboth

* Determine each person's max number of months in sample (depending on cohort)
local firstyrfull=1996-(45)
local lastyrfull=2015-(57)+1

g max_months=156
replace max_months=156 - ( tm(`firstyrfull'm4)-birth_month  ) if birth_month < tm(`firstyrfull'm4)
replace max_months=156 - ( birth_month - tm(`lastyrfull'm3)) if birth_month > tm(`lastyrfull'm3)

* Select sample observed employed/self_employed for at least 6 months 
foreach X in empl self_empl {
	bys i : egen count_m`X' = total(`X') 
	g prop_m`X'= count_m`X'/max_months 
	g sample_`X'=`X'==1 & (count_m`X'>=6) 
	bys i: egen isample_`X' = max(sample_`X')
} 

* Drop from sample of empl those who ever have self_employment
foreach X in empl self_empl {
	bysort i: egen `X'_any2=max(`X')
}
drop if isample_empl==1 & self_empl_any2==1

* Drop people in none of the samples
drop if isample_empl!=1 & isample_self_empl!=1 
 
 
* Keep relevant variables
keep i t j year *age* timemonths_bcw  *empl* W*   ben ciiu*  Tipocontr ipc ndep ndep_cat *sample* birth_month *count* cohort max_months Fing Fegr hrsmonth  *jobchange* *bcw*

tempfile retirdata
save `retirdata', replace

*Add Min Contribution Base
tempfile time
use `retirdata', clear
keep t 
duplicates drop 
save `time'

import delimited "`dir_raw'/FICTO UNIPERSONALES.csv", clear rowrange(5) varnames(5)
drop valorbfc aportebps

destring mgravado, replace ignore(",")

g t=mofd(date(fvigencia, "MY",2019))
format t %tm
sort t
merge 1:1 t using `time'
sort t
tset t

replace mgravado = l.mgravado if mgravado==.
replace mgravado = mgravado[1] if _n==2

drop if _m==1
drop _m fvigencia

merge 1:m t using `retirdata'
drop _m

g rficto = mgravado/(1000*ipc)
label var rficto "Minimum contribution base"

drop mgravado 

save `retirdata', replace


********************************************************************************
**** PREPARE DATA FOR ANALYSIS 							
********************************************************************************

use  `retirdata', clear

drop if age_bcwstart<50 
drop if age_bcwstart>=60 

sort i t
bysort i: g order=_n


*Broad industries
gen 	ciiu1 = 1 if ciiu2<10											// Agriculture and mining	
replace ciiu1 = 2 if (ciiu2>=10 & ciiu2<=33) | ciiu2==95 				// Manufacturing 
replace ciiu1 = 3 if ciiu2>=35 & ciiu2<=39 								// Energy and waste disposal
replace ciiu1 = 4 if ciiu2>=41 & ciiu2<=43  							// Construction
replace ciiu1 = 5 if (ciiu2>=45 & ciiu2<=47) | (ciiu2>=55 & ciiu2<=56)	// wholesale and retail, restaurants, hotels
replace ciiu1 = 6 if (ciiu2>=49 & ciiu2<=53) | ciiu2==61				// transport and communications 
replace ciiu1 = 7 if (ciiu2>=62 & ciiu2<=82) | ciiu2==96				// services
replace ciiu1 = 8 if (ciiu2>=84 & ciiu2<=94) | (ciiu2>=58 & ciiu2<=60)| ciiu2==97 // public admin, social and domestic services


* ciiu1 and ciiu2 when first obs
forvalues i=1/2 {
	cap drop aux
	g aux=ciiu`i'  if order==1 
	replace aux=0 if aux==.
	bysort i: egen ciiu`i'_1stobs=max(aux) 
	drop aux
	label var ciiu`i'_1stobs "Industry when 1st obs"
}


// sectors using the classification of bc_rama in ECH
g manufacturing		= ciiu1==2
g retailhospitality	= ciiu1==5 
g transportenergy	= ciiu1==6 | ciiu1==3
g services 			= ciiu1==7 | ciiu1==8 | ciiu1==4 // includes construction not in construction pension system

foreach var in manufacturing retailhospitality transportenergy services {
	replace `var'=. if ciiu2==.
}
label var manufacturing 	"Manufacturing"
label var retailhospitality "Retail, Restaurants, Hotels"
label var transportenergy 	"Transport, Communications, Energy"
label var services 			"Services, Other"

g sector=1 if manufacturing==1
replace sector=2 if retailhospitality==1
replace sector=3 if transportenergy==1
replace sector=4 if services==1

g jobstart=mofd(Fing)
format jobstart %tm
g tenure=t-jobstart
sum tenure if self_empl==1, det
sum tenure if empl==1, det
g tenure_1yrs=tenure>=12 if tenure!=.


* FIRM SIZE CATEGORIES 
replace ndep_cat=ndep_cat+1
replace ndep_cat=0 if ndep==0
label define ndep_cat 0 "no employees" 1 "1-4" 2 "5-9" 3 "10-19" 4 "20-49" 5 "50-249" 6"250 plus" , replace
label values ndep_cat ndep_cat

* FIRM SIZES WHEN FIRST OBS
cap drop aux
g aux=ndep_cat  if order==1 
bysort i: egen ndep_cat_1stobs=min(aux) 
label values ndep_cat_1stobs ndep_cat
label var ndep_cat_1stobs "Firm size when 1st observed"
drop aux

cap drop aux
g aux=ndep  if order==1 
bysort i: egen ndep_1stobs=min(aux) 
label var ndep_1stobs "Firm size when 1st observed"
drop aux

* Large/small firm when first observed
g larger=ndep_cat>=3 & ndep_cat<.
cap drop aux
g aux=larger==1  if order==1 & empl==1
replace aux=0 if aux==.
bysort i: egen large_1stobs=max(aux) 
drop aux
g small_1stobs = 1-large_1stobs

* 0-3 post bcw dummy
g post03=time_bcw>=0 & time_bcw<4
label var post03 "0-3 yrs. post start BCW"
* 4+ post bcw dummy
g post4=time_bcw>=4
label var post4 "4+ yrs. post start BCW"
* 2+ pre bcw 
g pre2=time_bcw<=-2
label var pre2 "2+ yrs. pre start BCW"
* 2-5 pre bcw 
g pre25=time_bcw>=-5 & time_bcw<=-2
label var pre25 "2-5 yrs. pre start BCW"
* 6+ pre bcw 
g pre6=time_bcw<=-6 
label var pre6 "6+ yrs. pre start BCW"	
* Post start BCW
g post0=time_bcw>=0 
label var post0 "Post start BCW"
* Pre 46-48 (to drop two dummies)
g pre24=time_bcw>-5 & time_bcw<=-2
label var pre24 "2-4 yrs. pre start BCW"

	
*Interactions age trend and shifts
*cap drop age // now renaming treatment var
g bcwttrend=timemonths_bcw/12
label var bcwttrend "Event-time trend"
g bcwttrend2=bcwttrend^2
label var bcwttrend2 "Event-time trend squared"
gen bcwttrend_post0 = bcwttrend*post0
label var bcwttrend_post0 "Post BCW x event-time trend"
g bcwttrend2_post0 = bcwttrend2 * post0
label var bcwttrend2_post0 "Post BCW x trend squared"
gen bcwttrend_post4 = bcwttrend*post4
label var bcwttrend_post4 "4+ yrs. post BCW x event-time trend"
gen bcwttrend_pre6 = bcwttrend*pre6
label var bcwttrend_pre6 "6+ yrs. pre BCW x event-time trend"


* Reports Min Contribution Base
g Wround=round(W,.01)
g rfictoround=round(rficto,.01)
g reports_ficto= Wround==rfictoround if self_empl==1


*Wages relative to MCB
g Wficto=W/rficto
label var Wficto "Earnings/Self-emp. min."
		
label var year "Year"
label var cohort "Birth cohort"
label var age "Age"
label var prop_mempl "Prop. time employed" 
label var prop_mself_empl "Prop. time self employed" 
label var W "Reported earnings (1,000 UYP)"
	
* INTERACTIONS FOR DID
foreach var in bcwttrend post03 post4 pre2 pre25 pre24 pre6 post0 bcwttrend_post0 bcwttrend_pre6  {
g `var'_self_empl=`var' * self_empl
}
label var post03_self_empl 			"Self-employed x 0-3 post BCW"
label var post4_self_empl 			"Self-employed x 4+ post BCW"
label var pre2_self_empl 			"Self-employed x 2+ pre BCW"
label var pre25_self_empl 			"Self-employed x 2-5 pre BCW"
label var pre24_self_empl 			"Self-employed x 2-4 pre BCW"
label var pre6_self_empl 			"Self-employed x 6+ pre BCW"
label var post0_self_empl			"Self-employed x post BCW"
label var bcwttrend_self_empl	 	"Self-employed x event-time trend"
label var bcwttrend_post0_self_empl "Self-employed x post BCW x trend"
label var bcwttrend_pre6_self_empl 	"Self-employed x 6+ pre BCW x trend"


* LARGE SIZE FIRM (FOR EMPLOYEES)
foreach X in bcwttrend post03 post4 pre2 pre25 pre24 pre6 post0 bcwttrend_post0 bcwttrend_pre6  {
g `X'_larger= `X'*large_1stobs
}
label var post03_larger 			"Large firm x 0-3 post BCW"
label var post4_larger 				"Large firm x 4+ post BCW"
label var pre2_larger 				"Large firm x 2+ pre BCW"
label var pre25_larger 				"Large firm x 2-5 pre BCW"
label var pre24_larger 				"Large firm x 2-4 pre BCW"
label var pre6_larger 				"Large firm x 6+ pre BCW"
label var post0_larger				"Large firm x post BCW"
label var bcwttrend_larger	 		"Large firm x event-time trend"
label var bcwttrend_post0_larger 	"Large firm x post BCW x trend"
label var bcwttrend_pre6_larger 	"Large firm x 6+ pre BCW x trend"

* SMALL SIZE FIRM (FOR EMPLOYEES)
foreach X in bcwttrend post03 post4 pre2 pre25 pre24 pre6 post0 bcwttrend_post0 bcwttrend_pre6 {
g `X'_small= `X'*small_1stobs
}
label var post03_small 				"Small firm x 0-3 post BCW"
label var post4_small 				"Small firm x 4+ post BCW"
label var pre2_small 				"Small firm x 2+ pre BCW"
label var pre25_small 				"Small firm x 2-5 pre BCW"
label var pre24_small 				"Small firm x 2-4 pre BCW"
label var pre6_small 				"Small firm x 6+ pre BCW"
label var post0_small				"Small firm x post BCW"
label var bcwttrend_small	 		"Small firm x event-time trend"
label var bcwttrend_post0_small 	"Small firm x post BCW x trend"
label var bcwttrend_pre6_small 		"Small firm x 6+ pre BCW x trend"


* Hours of work and wage per hour
sum hrsmonth, det
replace hrsmonth=. if hrsmonth<r(p5)
g wagephr= W/hrsmonth if hrsmonth>0

save "`dir_clean'/`dataname'", replace


clear all
exit
